package com.chuangxin.data.chazidian;

import com.chuangxin.data.core.IDBasedFetcher;
import com.chuangxin.data.core.io.DataOutput;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

/**
 * Created by Dawnwords on 2015/2/2.
 */
public class SWGWSMFetcher extends IDBasedFetcher {
    private static final int start = 1;
    private static final int end = 10069;
    private static final String template = "http://www.chazidian.com/kepu_${id}/";

    public SWGWSMFetcher(DataOutput output) {
        super(output, start, end, template);
    }

    @Override
    protected TitleContent processDoc(Document doc, String url) {
        String title = doc.select("div.article_title > h1").eq(0).html();
        Elements wrapper = doc.select("div.article_detail");
        wrapper.select("p#pages").remove();
        String content = wrapper.eq(0).html();
        return new TitleContent(title, content);
    }
}
